source('~/Desktop/service/data/US_historical/merge.R')
source('~/Desktop/service/data/US_historical/merge.R')
setwd(dir)
fwrite(result, "merged_data.csv")
source('~/Desktop/service/data/US_historical/merge.R')
files = list.files()
names = gsub(".csv","", files)
for(i in 1:length(files)){
temp = fread(files[i])
assign(names[i], temp)
}
# 1800 - 1920
#############################################################
# extrapolate missing values
extrapolate = function(x.in, y.in, x.out){
x = x.in
y = log(y.in)
r = lm(y ~ x)
x = data.table(x = x.out)
p = exp(predict(r, x)) %>% round()
return(p)
}
# construction
x.in = BA814_830$Year[1:10]
y.in = BA814_830$Construction[1:10]
x.out = seq(1800, 1830, 10)
BA814_830$Construction[1:4] = extrapolate(x.in, y.in, x.out )
# manufacturing
x.in = BA814_830$Year[1:10]
y.in = BA814_830$Manufacturing[1:10]
x.out = c(1800, 1820, 1830)
BA814_830$Manufacturing[c(1, 3,4)] = extrapolate(x.in, y.in, x.out )
# aggregate
agr = BA814_830$Agriculture + BA814_830$Fishing
ind = BA814_830$Mining +BA814_830$Construction + BA814_830$Manufacturing
srv = BA814_830$Total - agr - ind
total = BA814_830$Total
year = BA814_830$Year
error = ((agr + ind + srv) - total)  / total
d1 = data.table(year, agr, ind, srv, total, source = "d1")
d1 = d1[year <= 1920]
# 1920 - 1929
###########################################################
# aggregate
year.agr =  BA470_477$Year
agr = BA470_477$Farm
ind = BA840_848$Mining + BA840_848$Construction + BA840_848$Manufacturing
srv = BA840_848$Total - ind
year.srv = BA840_848$Year
year = intersect(year.agr, year.srv)
agr = agr[year.agr %in% year]
ind = ind[year.srv %in% year]
srv = srv[year.srv %in% year]
total = BA470_477$Employed[year.agr %in% year]
error = ((agr + ind + srv) - total)  / total
d2 = data.table(year, agr, ind, srv, total, source = "d2")
d2 = d2[year >= 1920]
# 1929 - 1948
###########################################################
year = NIPA_6.8A$Year
agr = NIPA_6.8A$Farms + NIPA_6.8A$`Agricultural services, forestry, and fisheries `
ind = NIPA_6.8A$Mining + NIPA_6.8A$`Contract construction` + NIPA_6.8A$`Durable goods` + NIPA_6.8A$`Nondurable goods` +
NIPA_6.8A$`Gas, electric, and sanitary services `
srv = NIPA_6.8A$Total - agr - ind
total = NIPA_6.8A$Total
error = ((agr + ind + srv) - total)  / total
d3 = data.table(year, agr, ind, srv, total, source = "d3")
# 1948 - 1987
########################################################
year = NIPA_6.8B$year
agr = NIPA_6.8B$`Agriculture forestry and fishing`
ind = NIPA_6.8B$Mining + NIPA_6.8B$Construction + NIPA_6.8B$Manufacturing + NIPA_6.8B$`Electric gas and sanitary services`
srv = NIPA_6.8B$`Persons engaged in production` - agr - ind
total = NIPA_6.8B$`Persons engaged in production`
error = ((agr + ind + srv) - total)  / total
d4 = data.table(year, agr, ind, srv, total, source = "d4")
# 1987 - 2000
########################################################
year = NIPA_6.8C$Year
agr = NIPA_6.8C$`    Agriculture, forestry, and fishing`
ind = NIPA_6.8C$Mining + NIPA_6.8C$Construction + NIPA_6.8C$Manufacturing + NIPA_6.8C$`      Electric, gas, and sanitary services`
srv = NIPA_6.8C$`Persons engaged in production` - agr - ind
total = NIPA_6.8C$`Persons engaged in production`
error = ((agr + ind + srv) - total)  / total
d5 = data.table(year, agr, ind, srv, total, source =  "d5")
# 1998 - 2016
########################################################
year = NIPA_6.8D$Year
agr = NIPA_6.8D$`        Agriculture, forestry, fishing, and hunting`
ind = NIPA_6.8D$Mining + NIPA_6.8D$Construction + NIPA_6.8D$Manufacturing + NIPA_6.8D$Utilities
srv = NIPA_6.8D$`Persons engaged in production1` - ind - agr
total = NIPA_6.8D$`Persons engaged in production1`
error = ((agr + ind + srv) - total)  / total
d6 = data.table(year, agr, ind, srv, total, source = "d6")
# merge
result = rbind(d1, d2, d3, d4, d5, d6)
rm(list=setdiff(ls(), "result"))
dir = here()
setwd(wd)
result = fread("merged_data.csv")
library(foreach)
source('~/Desktop/service/data/US_historical/splice.R')
View(splice)
source('~/Desktop/service/data/US_historical/splice.R')
source('~/Desktop/service/data/US_historical/splice.R')
source('~/Desktop/service/data/US_historical/splice.R')
stopCluster(cl)
stopCluster(cl)
source('~/Desktop/service/data/US_historical/splice.R')
source('~/Desktop/service/data/US_historical/splice.R')
iteration
year.start
sample.year = unique(result$year)
sample.year = sample.year[-1, - length(sample.year)]
sample.year = unique(result$year)
sample.year = sample.year[-1, - length(sample.year)]
sample.year = unique(result$year)
remove = c(-1, - length(sample.year)
sample.year = sample.year[]
# splice randomly
########################################################
n.iteration = 100
cl <- makeCluster(8, type="SOCK")
registerDoSNOW(cl)
clusterSetupRNG (cl, type = "RNGstream")
beginTime <- proc.time()
pb <- txtProgressBar(max = n.iteration, style = 3)
progress <- function(n) setTxtProgressBar(pb, n)
opts <- list(progress=progress)
#splice =  foreach ( iteration = 1:n.iteration, .options.snow=opts, .combine = rbind,
#                   .packages=c('data.table', 'magrittr')) %dopar%  {
for(iteration in 1:n.iteration){
year.start = sample(result$year, 1)
year.back = result[year <= year.start]$year %>% sort(., decreasing = T) %>% unique()
year.forward = result[year >= year.start]$year %>% unique()
start.sub = result[year == year.start]
start.sub = start.sub[ sample(nrow(start.sub), 1)   ]
output = start.sub[,2:5]
# back
l = length(year.back) -1
for(i in 1:l){
year.sub = year.back[c(i, i + 1)]
sub = result[year %in% year.sub]
n = data.table(table(sub$source))
n = n[N > 1]
source.sample = sample(n$V1, 1)
sub = sub[source == source.sample]
gr = sub[1, 2:5] / sub[2, 2:5]
new = output[1] * gr
output = rbind(new, output)
}
output.back = cbind(year = sort(year.back), output)
# forward
output = start.sub[,2:5]
l = length(year.forward) -1
for(i in 1:l){
year.sub = year.forward[c(i, i + 1)]
sub = result[year %in% year.sub]
n = data.table(table(sub$source))
n = n[N > 1]
source.sample = sample(n$V1, 1)
sub = sub[source == source.sample]
gr = sub[2, 2:5] / sub[1, 2:5]
new = tail(output,1) * gr
output = rbind(output, new)
}
output.forward = cbind(year = year.forward, output)
final = rbind(output.back, output.forward)
}
stopCluster(cl)
remove = c(-1, - length(sample.year) )
sample.year = sample.year[-remove]
result = fread("merged_data.csv")
sample.year = unique(result$year)
remove = c(1, length(sample.year) )
sample.year = sample.year[-remove]
source('~/Desktop/service/data/US_historical/splice.R')
stopCluster(cl)
source('~/Desktop/service/data/US_historical/splice.R')
source('~/Desktop/service/data/US_historical/splice.R')
plot(final$year, final$agr)
source('~/Desktop/service/data/US_historical/splice.R')
plot(splice$year, splice$ind)
plot(splice$year, splice$srv)
plot(splice$year, splice$total)
View(splice)
splice$total.series = rowSums(splice[, 2:4 ])
View(splice)
plot(splice$total, splice$total.series)
plot(splice$total, splice$total.series, log = "xy")
splice$agr_frac = splice$agr/splice$total.series*100
View(splice)
splice$ind_frac = splice$ind/splice$total.series*100
splice$srv_frac = splice$srv/splice$total.series*100
plot(splice$year, splice$agr_frac)
points(splice$year, splice$ind_frac, col = "red")
points(splice$year, splice$srv_frac, col = "blue")
View(splice)
fwrite(splice, "splice_results.csv")
source('~/Desktop/service/data/US_historical/splice.R')
source('~/Desktop/service/figures/sector_energy_plot.R')
dir = here()
dir
source('~/Desktop/service/data/US_historical/splice.R')
